county_fips = read_csv("./data/all-geocodes-v2016.csv", skip = 4) %>%
  janitor::clean_names() %>%
  rename(county_name = area_name_including_legal_statistical_area_description) %>%
  filter(county_code_fips != "000") %>%
  mutate(state_county_fips = paste0(state_code_fips, county_code_fips)) %>%
  select (state_code_fips, county_code_fips, state_county_fips, county_name)
## Parsed with column specification:
## cols(
##   `Summary Level` = col_character(),
##   `State Code (FIPS)` = col_character(),
##   `County Code (FIPS)` = col_character(),
##   `County Subdivision Code (FIPS)` = col_character(),
##   `Place Code (FIPS)` = col_character(),
##   `Consolidtated City Code (FIPS)` = col_character(),
##   `Area Name (including legal/statistical area description)` = col_character()
## )
state_fips = read_csv("./data/state-geocodes-v2016.csv", skip = 5) %>%
  janitor::clean_names() %>%
  filter(state_fips != "00") %>%
  select(state_fips, name) %>%
  mutate(state_fips = as.numeric(state_fips)) %>%
  rename(state = name)
## Parsed with column specification:
## cols(
##   Region = col_double(),
##   Division = col_double(),
##   `State (FIPS)` = col_character(),
##   Name = col_character()
## )
bee_county = read_csv("./data/bee_county/Bee_Colony_Census_Data_by_County 2.csv") %>% 
  janitor::clean_names() %>% 
  select(year, 
         state, 
         state_ansi, 
         ag_district, 
         ag_district_code, 
         county, 
         county_ansi, 
         value, 
         cv_percent) 
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Period = col_character(),
##   State = col_character(),
##   `State ANSI` = col_double(),
##   `Ag District` = col_character(),
##   `Ag District Code` = col_double(),
##   County = col_character(),
##   `County ANSI` = col_double(),
##   Value = col_character(),
##   `CV (%)` = col_character()
## )
 bee_county$state_ansi = stringr::str_pad(bee_county$state_ansi,2 , pad = "0")
bee_county$county_ansi = stringr::str_pad(bee_county$county_ansi,3 , pad = "0")

bee_county = bee_county%>% 
  mutate(state_county_fips = paste0(state_ansi, county_ansi),
    value = replace(value, value == "(D)", "NA"),
    cv_percent = replace(cv_percent, cv_percent == "(D)", "NA"),
    colony_count = value)
#create function to import data
file_name <- list.files(path = "./data/bee_state_2/") 

df = read_csv(file = str_c("./data/bee_state_2/", file_name[1])) %>%
  mutate(file = file_name[1])
## Parsed with column specification:
## cols(
##   `2` = col_double(),
##   t = col_character(),
##   `Honey: Released February 28, 2002, by the National Agricultural Statistics Service (NASS), Agricultural Statistics Board, U.S. Department of Agriculture.` = col_character(),
##   X4 = col_character(),
##   X5 = col_character(),
##   X6 = col_character(),
##   X7 = col_character(),
##   X8 = col_character(),
##   X9 = col_character()
## )
  df
## # A tibble: 51 x 10
##      `2` t     `Honey: Released ~ X4    X5    X6    X7    X8    X9    file 
##    <dbl> <chr> <chr>              <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1     2 t     Honey:  Number of~ <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002~
##  2     2 t     and Value by Stat~ <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002~
##  3     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002~
##  4     2 h     <NA>               Honey Yield <NA>  <NA>  Aver~ Value 2002~
##  5     2 h     State              Prod~ per   Prod~ Stoc~ Pric~ of    2002~
##  6     2 h     <NA>               Colo~ Colo~ <NA>  Dec ~ Poun~ Prod~ 2002~
##  7     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002~
##  8     2 u     <NA>               1,000 Poun~ 1,00~ 1,00~ Cents 1,00~ 2002~
##  9     2 d     AL                 16    78    1248  187   59    736   2002~
## 10     2 d     AZ                 40    59    2360  1322  73    1723  2002~
## # ... with 41 more rows
my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}

my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}
  
bee_data = 
  tibble(
    file_names = file_name,
    path = str_c("./data/bee_state_2/", file_names)
  ) %>% 
  mutate(data = map(path, my_read_csv))%>% 
  unnest()
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
#clean data set
clean_bee_data =
  bee_data %>%
 separate(file_names, into = c("year", "remove"), sep = ".c") %>%
  select(-remove, -X1, -X2, -path) %>%
  rename(state = X3, honey_producing_colonies = X4, yield_per_colony = X5, production = X6, stocks = X7, price_per_pound = X8, production_value = X9) %>%
  drop_na(state) %>%
  mutate(state = recode(state, AL = "Alabama", AR = "Arkansas", AZ = "Arizona", CA = "California", CO = "Colorado", FL = "Florida", GA = "Georgia", HI = "Hawaii", IA = "Iowa", IL = "Illinois", ID = "Idaho", IN = "Indiana", KS = "Kansas", KY = "Kentucky", LA = "Louisiana", ME = "Maine", MD = "Maryland", MI = "Michigan", MN = "Minnesota", MO = "Missouri", MT = "Montana", MS = "Mississippi", NC = "North Carolina", ND = "North Dakota", NE = "Nebraska", NJ = "New Jersey", NM = "New Mexico", NV = "Nevada", NY = "New York", OH = "Ohio", OK = "Oklahoma", OR = "Oregon", PA = "Pennsylvania", SC = "South Carolina", SD = "South Dakota", TN = "Tennessee", TX = "Texas", UT = "Utah", VA = "Virginia", VT = "Vermont", WA = "Washington", WV = "West Virginia", WI = "Wisconsin", WY = "Wyoming"  ))

clean_bee_data %>%
  ggplot(aes(x = year, y = honey_producing_colonies, color = state)) +
  geom_point()

pest_2002 = read_excel("./data/pesticides_csv/EPest.county.estimates.2002.xlsx") %>% 
  janitor::clean_names() %>% 
  mutate(
    state_fips = state_fips_code,
    county_fips = county_fips_code,
    state_county_fips = paste0(state_fips, county_fips),
    epest_low_kg = round(epest_low_kg),
    epest_high_kg = round(epest_high_kg)) %>%
  select(-state_fips_code, -county_fips_code)
    
pest_2002 %>% 
  group_by(year, state_county_fips, compound) %>% 
  ggplot(aes(x = compound, y = epest_high_kg)) + 
  geom_col() 

unique(pull(pest_2002, compound))
state_bee_fips = full_join(clean_bee_data, state_fips, by = "state")

top_pesticides = read_csv("./data/top_pesticides.csv") %>%
    mutate(state_fips = as.numeric(state_fips))
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
state_pest_data = 
  top_pesticides %>%
  group_by(state_fips, year, compound) %>%
  summarize(low = sum(epest_low_kg), high = sum(epest_high_kg)) 
state_bee_yrfips = 
  state_bee_fips %>%
  mutate(yearfips = paste0(year, state_fips))

state_pest_yrfips = 
  state_pest_data %>%
  mutate(yearfips = paste0(year, state_fips))

merged_state_data = full_join(state_bee_yrfips, state_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, state_fips.x, honey_producing_colonies, yield_per_colony, production, compound, low, high) %>%
  drop_na(year.x, state, compound, high)
county_bee_yrfips = 
  bee_county %>%
  mutate(yearfips = paste0(year, state_county_fips))
         
county_pest_yrfips = 
  top_pesticides %>%
  mutate(state_county_fips = as.numeric(state_county_fips) ,
         yearfips = paste0(year, state_county_fips))

merged_county_data = full_join(county_bee_yrfips, county_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, county, county_fips, colony_count, compound, epest_low_kg, epest_high_kg) %>%
  drop_na(year.x, state, county, colony_count, compound, epest_high_kg)
#correlation of state level means across all years

corr_state = read_csv("./data/top_pesticides.csv") %>% 
  group_by(compound, state_fips) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  pivot_wider(
    names_from = "compound",
    values_from = "mean_pest_high",
  ) %>% 
  select(-state_fips)
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
corr_state %>%
  view()

matrix_state_1 = cor(corr_state, use = "everything", method = c("pearson"))
matrix_state_1
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL      1.0000000    0.4802217    0.0571605       NA
## CHLORPYRIFOS        0.4802217    1.0000000    0.5316680       NA
## CLOTHIANIDIN        0.0571605    0.5316680    1.0000000       NA
## FIPRONIL                   NA           NA           NA        1
## IMIDACLOPRID        0.4953138    0.8741880    0.3934778       NA
## THIACLOPRID                NA           NA           NA       NA
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.4953138          NA
## CHLORPYRIFOS      0.8741880          NA
## CLOTHIANIDIN      0.3934778          NA
## FIPRONIL                 NA          NA
## IMIDACLOPRID      1.0000000          NA
## THIACLOPRID              NA           1
matrix_state_2 = cor(corr_state, use = "complete.obs", method = c("pearson"))
matrix_state_2
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN     FIPRONIL
## CHLOROTHALONIL      1.0000000   0.70246745    0.3117681 -0.122623772
## CHLORPYRIFOS        0.7024675   1.00000000    0.5756553  0.001349550
## CLOTHIANIDIN        0.3117681   0.57565529    1.0000000  0.506889870
## FIPRONIL           -0.1226238   0.00134955    0.5068899  1.000000000
## IMIDACLOPRID        0.6541162   0.97497272    0.5498456 -0.003886203
## THIACLOPRID         0.1766534   0.12367034   -0.1679516 -0.056247127
##                IMIDACLOPRID  THIACLOPRID
## CHLOROTHALONIL  0.654116214  0.176653447
## CHLORPYRIFOS    0.974972715  0.123670345
## CLOTHIANIDIN    0.549845616 -0.167951571
## FIPRONIL       -0.003886203 -0.056247127
## IMIDACLOPRID    1.000000000 -0.002073838
## THIACLOPRID    -0.002073838  1.000000000
#correlation of country level mean by year 

corr_country = read_csv("./data/top_pesticides.csv") %>% 
  group_by(compound, year) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  pivot_wider(
    names_from = "compound",
    values_from = "mean_pest_high",
  ) %>%
  ungroup() %>%
  select(-year)
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
matrix_country_1 = cor(corr_country, use = "everything", method = c("pearson"))
matrix_country_1
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN    FIPRONIL
## CHLOROTHALONIL     1.00000000  -0.30707012   0.05973758 -0.59954253
## CHLORPYRIFOS      -0.30707012   1.00000000   0.19330789 -0.02526745
## CLOTHIANIDIN       0.05973758   0.19330789   1.00000000 -0.45653473
## FIPRONIL          -0.59954253  -0.02526745  -0.45653473  1.00000000
## IMIDACLOPRID       0.22768878  -0.14992739   0.87269447 -0.58062939
## THIACLOPRID       -0.01268331  -0.28960638   0.31040782 -0.29736183
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.2276888 -0.01268331
## CHLORPYRIFOS     -0.1499274 -0.28960638
## CLOTHIANIDIN      0.8726945  0.31040782
## FIPRONIL         -0.5806294 -0.29736183
## IMIDACLOPRID      1.0000000  0.40945350
## THIACLOPRID       0.4094535  1.00000000
matrix_country_2 = cor(corr_country, use = "complete.obs", method = c("pearson"))
matrix_country_2
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN    FIPRONIL
## CHLOROTHALONIL     1.00000000  -0.30707012   0.05973758 -0.59954253
## CHLORPYRIFOS      -0.30707012   1.00000000   0.19330789 -0.02526745
## CLOTHIANIDIN       0.05973758   0.19330789   1.00000000 -0.45653473
## FIPRONIL          -0.59954253  -0.02526745  -0.45653473  1.00000000
## IMIDACLOPRID       0.22768878  -0.14992739   0.87269447 -0.58062939
## THIACLOPRID       -0.01268331  -0.28960638   0.31040782 -0.29736183
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.2276888 -0.01268331
## CHLORPYRIFOS     -0.1499274 -0.28960638
## CLOTHIANIDIN      0.8726945  0.31040782
## FIPRONIL         -0.5806294 -0.29736183
## IMIDACLOPRID      1.0000000  0.40945350
## THIACLOPRID       0.4094535  1.00000000
#visualization 

corrplot(matrix_state_2, order = "hclust", type = "full")

corrplot(matrix_country_2, order = "hclust", type = "full")

pest_country = read_csv("./data/top_pesticides.csv") %>% 
  group_by(compound, year) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = mean_pest_high, color = compound)) + 
  geom_line() 
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
pest_country

pest_state = read_csv("./data/top_pesticides.csv") %>% 
  group_by(year, compound, state_fips) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, y = mean_pest_high, color = compound)) + 
  geom_line() + 
  facet_grid(~state_fips)
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
pest_state

Looking at merged state data

summary(merged_state_data)
##     year.x             state            state_fips.x  
##  Length:2503        Length:2503        Min.   : 1.00  
##  Class :character   Class :character   1st Qu.:19.00  
##  Mode  :character   Mode  :character   Median :31.00  
##                                        Mean   :31.09  
##                                        3rd Qu.:46.00  
##                                        Max.   :56.00  
##                                                       
##  honey_producing_colonies yield_per_colony   production   
##  Min.   :  2.00           Min.   : 19.00   Min.   :   84  
##  1st Qu.:  9.00           1st Qu.: 45.00   1st Qu.:  459  
##  Median : 27.00           Median : 56.00   Median : 1550  
##  Mean   : 61.15           Mean   : 57.93   Mean   : 3906  
##  3rd Qu.: 65.00           3rd Qu.: 68.00   3rd Qu.: 3780  
##  Max.   :510.00           Max.   :124.00   Max.   :46410  
##                                                           
##    compound              low                 high        
##  Length:2503        Min.   :      0.0   Min.   :      0  
##  Class :character   1st Qu.:    742.8   1st Qu.:   1158  
##  Mode  :character   Median :   8517.0   Median :   8213  
##                     Mean   :  62555.0   Mean   :  52131  
##                     3rd Qu.:  48834.5   3rd Qu.:  45541  
##                     Max.   :1466223.0   Max.   :1468227  
##                     NA's   :1435
merged_state_data %>%
  group_by(compound, year.x) %>%
  summarize(mean_colony_count = mean(honey_producing_colonies),
         mean_pest_high_kg = mean(high)) %>%
  select(year.x, compound, mean_colony_count, mean_pest_high_kg) 
## # A tibble: 78 x 4
## # Groups:   compound [6]
##    year.x compound       mean_colony_count mean_pest_high_kg
##    <chr>  <chr>                      <dbl>             <dbl>
##  1 2004   CHLOROTHALONIL              59.5            88469.
##  2 2005   CHLOROTHALONIL              60.1            99930.
##  3 2006   CHLOROTHALONIL              63.2           106147.
##  4 2007   CHLOROTHALONIL              59.6           127449.
##  5 2008   CHLOROTHALONIL              59.1           106847.
##  6 2009   CHLOROTHALONIL              60.4           104185.
##  7 2010   CHLOROTHALONIL              57.9           122511.
##  8 2011   CHLOROTHALONIL              63.1           116520.
##  9 2012   CHLOROTHALONIL              68.1           126409.
## 10 2013   CHLOROTHALONIL              63             115444.
## # ... with 68 more rows
merged_state_data %>%
  group_by(compound, year.x) %>%
  summarize(mean_colony_count = mean(honey_producing_colonies),
         mean_pest_high_kg = mean(high)) %>%
  ggplot(aes(x = mean_pest_high_kg, y = mean_colony_count, color = year.x)) + geom_smooth() + facet_grid(~compound)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Looking at merged county data #there appear to be problems with the colony count data, it is missing

summary(merged_county_data)
##      year.x        state              county          county_fips       
##  Min.   :2007   Length:20742       Length:20742       Length:20742      
##  1st Qu.:2007   Class :character   Class :character   Class :character  
##  Median :2007   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2009                                                           
##  3rd Qu.:2012                                                           
##  Max.   :2012                                                           
##                                                                         
##  colony_count         compound          epest_low_kg     
##  Length:20742       Length:20742       Min.   :     0.0  
##  Class :character   Class :character   1st Qu.:     7.0  
##  Mode  :character   Mode  :character   Median :    51.0  
##                                        Mean   :   724.1  
##                                        3rd Qu.:   354.0  
##                                        Max.   :160848.0  
##                                        NA's   :2930      
##  epest_high_kg     
##  Min.   :     0.0  
##  1st Qu.:     9.0  
##  Median :    70.0  
##  Mean   :   802.1  
##  3rd Qu.:   482.0  
##  Max.   :160846.0  
## 
merged_county_data %>%
  mutate(colony_count = as.numeric(colony_count)) %>%
  group_by(compound, year.x) %>%
  summarize(mean_colony_count = mean(colony_count),
         mean_pest_high_kg = mean(epest_high_kg)) %>%
  select(year.x, compound, mean_colony_count, mean_pest_high_kg)
## # A tibble: 12 x 4
## # Groups:   compound [6]
##    year.x compound       mean_colony_count mean_pest_high_kg
##     <dbl> <chr>                      <dbl>             <dbl>
##  1   2007 CHLOROTHALONIL                NA            1930. 
##  2   2012 CHLOROTHALONIL                NA            1694. 
##  3   2007 CHLORPYRIFOS                  NA            1238. 
##  4   2012 CHLORPYRIFOS                  NA            1392. 
##  5   2007 CLOTHIANIDIN                  NA             162. 
##  6   2012 CLOTHIANIDIN                  NA             497. 
##  7   2007 FIPRONIL                      NA              56.5
##  8   2012 FIPRONIL                      NA              28.1
##  9   2007 IMIDACLOPRID                  NA              79.6
## 10   2012 IMIDACLOPRID                  NA             263. 
## 11   2007 THIACLOPRID                   NA              12.3
## 12   2012 THIACLOPRID                   NA              11.7